/* First file for extracting data in SQL */


use IDI_Clean_20181020 





-1-------------------------------------------------------------------------------------------------------------
-- pent permanent industries
--------------------------------------------------------------------------------------------------------------


-- save as csv Wgap_v3_indy.csv

SELECT 
	pent = ISNULL(IND.pent),
	anz06_4d									

FROM IDI_Sandpit.clean_read_IR.pent_ind_IDI_20181020_RFabling IND
ORDER BY pent

-- 1,563,XXX rows 










-2-------------------------------------------------------------------------------------------------------------
-- pent productivity data 
--------------------------------------------------------------------------------------------------------------

-- save as csv Wgap_v3b_prody.csv

SELECT 
	PROD.pent,
	dim_year_key,
	PROD.pf_ind,
	IND.anz06_4d, 
	go_nom, 
	M_nom, 
	K_nom, 
	RLR_nom,
	lnL
FROM [ibuldd_research_datalab].[STATSNZ\dl_RFabling].[pent_prod_IDI_20181020] PROD
LEFT JOIN IDI_Sandpit.clean_read_IR.pent_ind_IDI_20181020_RFabling IND
	ON PROD.pent = IND.pent

-- 3372,XXX rows 










--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------
-- individual-year (to March inc) level data set for all employees and WPs, matched to firm that pays greatest
-- wage/salary in the year
--------------------------------------------------------------------------------------------------------------


-3-------------------------------------------------------------------------------------------------------------
-- table where an observation is an employee-pent-month, including only wage/salary income

CREATE TABLE #emp_emp_mon(	snz_uid					INT			NOT NULL,
				calendar_year				SMALLINT		NOT NULL,
				calendar_month				TINYINT			NOT NULL,
				financial_year				SMALLINT		NOT NULL, -- year ending March
				ir_ems_gross_earnings_amt		DECIMAL(13,2)		NOT NULL,
				pent					CHAR(10)		NOT NULL,
				WP					TINYINT			NOT NULL, -- indicator for ever a WP for the pent
				birth_yr				SMALLINT		NULL,
				birth_mon				TINYINT			NULL,
				sex					CHAR(1)			NULL
	PRIMARY KEY CLUSTERED (snz_uid,calendar_year,calendar_month,financial_year,pent))

INSERT INTO #emp_emp_mon(snz_uid, calendar_year, calendar_month, financial_year, ir_ems_gross_earnings_amt,
	pent, WP, birth_yr, birth_mon, sex)

SELECT  DISTINCT 
	EMS.snz_uid,
	calendar_year = year(ir_ems_return_period_date),
	calendar_month = month(ir_ems_return_period_date),
	financial_year = CASE WHEN month(ir_ems_return_period_date)<=3 THEN year(ir_ems_return_period_date) -- calendar year for Jan-March
						WHEN month(ir_ems_return_period_date)>3 THEN year(ir_ems_return_period_date) + 1 -- next year for April onwards
						ELSE NULL END,
	ir_ems_gross_earnings_amt = SUM(ir_ems_gross_earnings_amt),
	pent = ISNULL(PENT.pent,'ZZ' + convert(CHAR(8),snz_employer_ird_uid)),
	WP = CASE WHEN WP.snz_uid IS NOT NULL THEN 1 ELSE 0 END,
	birth_yr = MAX(SEX.snz_birth_year_nbr),
	birth_mon = MAX(SEX.snz_birth_month_nbr),
	female = MAX(CASE WHEN SEX.[snz_sex_code] = 1 then 0 WHEN SEX.[snz_sex_code] = 2 THEN 1 ELSE NULL END)
FROM ir_clean.ird_ems EMS
LEFT JOIN IDI_Sandpit.clean_read_IR.pent_IDI_20181020_RFabling PENT
	ON EMS.ir_ems_enterprise_nbr = PENT.enterprise_nbr
LEFT JOIN [data].[personal_detail] SEX
	ON EMS.snz_uid = SEX.snz_uid
LEFT JOIN (	SELECT DISTINCT
			pent,
			snz_uid
		FROM IDI_Sandpit.clean_read_IR.pent_WP_yr_IDI_20181020_RFabling WPIN
		GROUP BY pent, snz_uid) WP -- ever working proprietor for that pent
	ON EMS.snz_uid = WP.snz_uid
	AND PENT.pent = WP.pent
WHERE [ir_ems_income_source_code] = 'W&S'
	AND (100*year(ir_ems_return_period_date) + month(ir_ems_return_period_date) >= start_month OR start_month IS NULL)
	AND (100*year(ir_ems_return_period_date) + month(ir_ems_return_period_date) <= end_month OR end_month IS NULL)
GROUP BY EMS.snz_uid,  year(ir_ems_return_period_date), month(ir_ems_return_period_date), WP.snz_uid,
	ISNULL(PENT.pent,'ZZ' + convert(CHAR(8),snz_employer_ird_uid))


--439,831,XXX rows 







-4-------------------------------------------------------------------------------------------------------------

CREATE TABLE #emp_emp_mon2(	snz_uid					INT			NOT NULL,
				snz_ird_uid				INT			NOT NULL,
				calendar_year				SMALLINT		NOT NULL,
				calendar_month				TINYINT			NOT NULL,
				financial_year				SMALLINT		NOT NULL, -- March year
				dim_year_key				INT			NOT NULL, -- firm's own financial year + march
				ir_ems_gross_earnings_amt		DECIMAL(13,2)		NOT NULL,
				pent					CHAR(10)		NOT NULL,
				WP					TINYINT			NOT NULL, -- indicator for ever a WP for the pent
				fte_employee				FLOAT			NULL,	-- employee ftes
				fte_WP					FLOAT			NULL,	-- working proprietor pseudo ftes
				n_jobs					TINYINT			NULL,
				birth_yr				SMALLINT		NULL,
				birth_mon				TINYINT			NULL,
				sex					CHAR(1)			NULL
	PRIMARY KEY CLUSTERED (snz_uid,snz_ird_uid,calendar_year,calendar_month,financial_year,pent))

INSERT INTO #emp_emp_mon2(snz_uid, snz_ird_uid, calendar_year, calendar_month, financial_year, 
	dim_year_key, ir_ems_gross_earnings_amt,
	pent, WP, fte_employee, fte_WP, n_jobs, birth_yr, birth_mon, sex)
SELECT   
	EMP.snz_uid,
	snz_ird_uid,
	EMP.calendar_year,
	calendar_month,
	financial_year,
	BAL.dim_year_key,
	ir_ems_gross_earnings_amt,
	EMP.pent,
	WP = CASE WHEN WP.snz_uid IS NOT NULL THEN 1 ELSE 0 END,
	fte_employee = fte,
	fte_WP = WPFTE.adj_WP_count,
	n_jobs,	-- number of jobs worked by employee in month (employees only)
	birth_yr = snz_birth_year_nbr,
	birth_mon = snz_birth_month_nbr,
	sex = CASE WHEN [snz_sex_code] = 1 then 'M' WHEN [snz_sex_code] = 2 THEN 'F' ELSE NULL END
FROM #emp_emp_mon EMP
LEFT JOIN (	SELECT DISTINCT
			pent,
			snz_uid
		FROM IDI_Sandpit.clean_read_IR.pent_WP_yr_IDI_20181020_RFabling
		GROUP BY pent, snz_uid) WP -- ever working proprietor for that pent
	ON EMP.snz_uid = WP.snz_uid
	AND EMP.pent = WP.pent
LEFT JOIN IDI_Sandpit.clean_read_IR.pent_emp_mth_FTE_IDI_20181020_RFabling FTE -- FTE info for workers
	ON EMP.snz_uid = FTE.snz_uid
	AND EMP.pent = FTE.pent
	AND 100*EMP.calendar_year + EMP.calendar_month = FTE.dim_month_key
LEFT JOIN IDI_Sandpit.[clean_read_IR].[pent_WP_yr_IDI_20181020_RFabling] WPFTE
	ON EMP.snz_uid = WPFTE.snz_uid
	AND EMP.pent = WPFTE.pent
	AND 100*EMP.financial_year + 3 = WPFTE.dim_year_key
LEFT JOIN [data].[personal_detail] SEX
	ON EMP.snz_uid = SEX.snz_uid

WHERE (100*EMP.calendar_year+EMP.calendar_month)>=BAL.[start_month_key] 
	AND (100*EMP.calendar_year+EMP.calendar_month)<=BAL.[end_month_key]






-5-------------------------------------------------------------------------------------------------------------
-- Part A: Collapsing to one observation per person-pent-financial year (all pents)

CREATE TABLE #emp_emp_yr(	snz_uid					INT			NOT NULL,
				snz_ird_uid				INT			NOT NULL,
				financial_year				SMALLINT		NOT NULL,
				gross_earn_yr				DECIMAL(13,2)		NOT NULL,
				pent					CHAR(10)		NOT NULL,
				WP					TINYINT			NOT NULL, -- indicator for ever a WP for the pent
				fte_employee_av				FLOAT			NULL,	-- employee ftes
				fte_WP_av				FLOAT			NULL,	-- working proprietor pseudo ftes
				mnths_wkd				TINYINT			NULL,
				mnths_wkd_ft				TINYINT			NULL,		-- months worked with fte=1 (not imputed)
				mnths_wkd_pt				TINYINT			NULL,		-- months wkd with fte<1 (not imputed)
				m_01					TINYINT			NOT NULL, -- dummy for worked in Jan (as employee or WP)
				m_02					TINYINT			NOT NULL,
				m_03					TINYINT			NOT NULL,
				m_04					TINYINT			NOT NULL,
				m_05					TINYINT			NOT NULL,
				m_06					TINYINT			NOT NULL,
				m_07					TINYINT			NOT NULL,
				m_08					TINYINT			NOT NULL,
				m_09					TINYINT			NOT NULL,
				m_10					TINYINT			NOT NULL,
				m_11					TINYINT			NOT NULL,
				m_12					TINYINT			NOT NULL,
				birth_yr				SMALLINT		NULL,
				birth_mon				TINYINT			NULL,
				sex					CHAR(1)			NULL,
				prev_financial_yr			SMALLINT		NOT NULL
	PRIMARY KEY CLUSTERED (snz_uid,snz_ird_uid,financial_year,pent))

INSERT INTO #emp_emp_yr(snz_uid, snz_ird_uid, financial_year, gross_earn_yr,
	pent, WP, fte_employee_av, fte_WP_av, mnths_wkd, mnths_wkd_ft, mnths_wkd_pt, m_01, m_02, m_03, m_04, m_05, m_06, m_07, m_08, m_09, m_10, m_11, m_12,
	birth_yr, birth_mon, sex, prev_financial_yr)

SELECT DISTINCT 
	snz_uid,
	snz_ird_uid,
	financial_year,
	gross_earn_yr = SUM(ir_ems_gross_earnings_amt),
	pent,
	WP = MAX(WP),
	fte_employee_av = SUM(ISNULL(fte_employee,1000))/12, -- replace missings with 1000 so can extract months with missing from average
	fte_WP_av = SUM(fte_WP)/12,
	mnths_wkd = COUNT(*),
	mnths_wkd_ft = SUM(CASE WHEN fte_employee >= 0.99 and fte_employee<=1.01 THEN 1 ELSE 0 END),
	mnths_wkd_pt = SUM(CASE WHEN fte_employee < 0.99 THEN 1 ELSE 0 END),
	m_01 = MAX(CASE WHEN calendar_month = 1 THEN 1 ELSE 0 END),
	m_02 = MAX(CASE WHEN calendar_month = 2 THEN 1 ELSE 0 END),
	m_03 = MAX(CASE WHEN calendar_month = 3 THEN 1 ELSE 0 END),
	m_04 = MAX(CASE WHEN calendar_month = 4 THEN 1 ELSE 0 END),
	m_05 = MAX(CASE WHEN calendar_month = 5 THEN 1 ELSE 0 END),
	m_06 = MAX(CASE WHEN calendar_month = 6 THEN 1 ELSE 0 END),
	m_07 = MAX(CASE WHEN calendar_month = 7 THEN 1 ELSE 0 END),
	m_08 = MAX(CASE WHEN calendar_month = 8 THEN 1 ELSE 0 END),
	m_09 = MAX(CASE WHEN calendar_month = 9 THEN 1 ELSE 0 END),
	m_10 = MAX(CASE WHEN calendar_month = 10 THEN 1 ELSE 0 END),
	m_11 = MAX(CASE WHEN calendar_month = 11 THEN 1 ELSE 0 END),
	m_12 = MAX(CASE WHEN calendar_month = 12 THEN 1 ELSE 0 END),
	birth_yr = MAX(birth_yr),
	birth_mon = MAX(birth_mon),
	sex = MAX(sex) ,
	prev_financial_yr = financial_year - 1
FROM #emp_emp_mon2
GROUP BY snz_uid, snz_ird_uid, financial_year, pent

-- 65,441,XXX rows





-6-------------------------------------------------------------------------------------------------------------
-- Part B: Collapsing to one observation per person-pent-dim_year_key for pents with non-March balance months only

CREATE TABLE #emp_emp_yrb(	snz_uid				INT		NOT NULL,
				snz_ird_uid			INT		NOT NULL,
				dim_year_key			INT		NOT NULL,
				bal_mon				TINYINT		NOT NULL,
				gross_earn_yr			DECIMAL(13,2)	NOT NULL,
				pent				CHAR(10)	NOT NULL,
				WP				TINYINT		NOT NULL, -- indicator for ever a WP for the pent
				fte_employee_av			FLOAT		NULL,	-- employee ftes
				fte_WP_av			FLOAT		NULL,	-- working proprietor pseudo ftes
				mnths_wkd			TINYINT		NULL,
				mnths_wkd_ft			TINYINT		NULL,
				mnths_wkd_pt			TINYINT		NULL,
				m_01				TINYINT		NOT NULL, -- dummy for worked in Jan (as employee or WP)
				m_02				TINYINT		NOT NULL,
				m_03				TINYINT		NOT NULL,
				m_04				TINYINT		NOT NULL,
				m_05				TINYINT		NOT NULL,
				m_06				TINYINT		NOT NULL,
				m_07				TINYINT		NOT NULL,
				m_08				TINYINT		NOT NULL,
				m_09				TINYINT		NOT NULL,
				m_10				TINYINT		NOT NULL,
				m_11				TINYINT		NOT NULL,
				m_12				TINYINT		NOT NULL,
				birth_yr			SMALLINT	NULL,
				birth_mon			TINYINT		NULL,
				sex				CHAR(1)		NULL,
				prev_dim_year_key		INT		NOT NULL
	PRIMARY KEY CLUSTERED (snz_uid,snz_ird_uid,dim_year_key,pent))

INSERT INTO #emp_emp_yrb(snz_uid, snz_ird_uid, dim_year_key, bal_mon, gross_earn_yr,
	pent, WP, fte_employee_av, fte_WP_av, mnths_wkd, mnths_wkd_ft, mnths_wkd_pt, m_01, m_02, m_03, m_04, m_05, m_06, m_07, m_08, m_09, m_10, m_11, m_12,
	birth_yr, birth_mon, sex, prev_dim_year_key)

SELECT DISTINCT 
	snz_uid,
	snz_ird_uid,
	dim_year_key,
	bal_mon,
	gross_earn_yr = SUM(ir_ems_gross_earnings_amt),
	pent,
	WP = MAX(WP),
	fte_employee_av = SUM(ISNULL(fte_employee,1000))/12, -- replace missings with 1000 so can extract months with missing from average
	fte_WP_av = SUM(fte_WP)/12,
	mnths_wkd = COUNT(*),
	mnths_wkd_ft = SUM(CASE WHEN fte_employee >= 0.99 and fte_employee<=1.01 THEN 1 ELSE 0 END),
	mnths_wkd_pt = SUM(CASE WHEN fte_employee < 0.99 THEN 1 ELSE 0 END),
	m_01 = MAX(CASE WHEN calendar_month = 1 THEN 1 ELSE 0 END),
	m_02 = MAX(CASE WHEN calendar_month = 2 THEN 1 ELSE 0 END),
	m_03 = MAX(CASE WHEN calendar_month = 3 THEN 1 ELSE 0 END),
	m_04 = MAX(CASE WHEN calendar_month = 4 THEN 1 ELSE 0 END),
	m_05 = MAX(CASE WHEN calendar_month = 5 THEN 1 ELSE 0 END),
	m_06 = MAX(CASE WHEN calendar_month = 6 THEN 1 ELSE 0 END),
	m_07 = MAX(CASE WHEN calendar_month = 7 THEN 1 ELSE 0 END),
	m_08 = MAX(CASE WHEN calendar_month = 8 THEN 1 ELSE 0 END),
	m_09 = MAX(CASE WHEN calendar_month = 9 THEN 1 ELSE 0 END),
	m_10 = MAX(CASE WHEN calendar_month = 10 THEN 1 ELSE 0 END),
	m_11 = MAX(CASE WHEN calendar_month = 11 THEN 1 ELSE 0 END),
	m_12 = MAX(CASE WHEN calendar_month = 12 THEN 1 ELSE 0 END),
	birth_yr = MAX(birth_yr),
	birth_mon = MAX(birth_mon),
	sex = MAX(sex) ,
	prev_dim_year_key = dim_year_key - 100
FROM #emp_emp_mon2
WHERE bal_mon!=3
GROUP BY snz_uid, snz_ird_uid, dim_year_key, pent, bal_mon


-- 29,947,XXX rows 


drop table #emp_emp_mon2




-7-------------------------------------------------------------------------------------------------------------
--- Part A: matching #emp_emp_yr back on itself to create an indicator for worked for same pent in previous financial year, all pents
--- One observation per person-pent-financial year

CREATE TABLE #emp_emp_yr2(	snz_uid				INT			NOT NULL,
				snz_ird_uid			INT			NOT NULL,
				financial_year			SMALLINT		NOT NULL,
				gross_earn_yr			DECIMAL(13,2)		NOT NULL,
				pent				CHAR(10)		NOT NULL,
				WP				TINYINT			NOT NULL, -- indicator for ever a WP for the pent
				fte_employee_av			FLOAT			NULL,	-- employee ftes
				fte_WP_av			FLOAT			NULL,	-- working proprietor pseudo ftes
				mnths_wkd			TINYINT			NULL,
				mnths_wkd_ft			TINYINT			NULL,
				mnths_wkd_pt			TINYINT			NULL,
				m_01				TINYINT			NOT NULL, -- dummy for worked in Jan (as employee or WP)
				m_02				TINYINT			NOT NULL,
				m_03				TINYINT			NOT NULL,
				m_04				TINYINT			NOT NULL,
				m_05				TINYINT			NOT NULL,
				m_06				TINYINT			NOT NULL,
				m_07				TINYINT			NOT NULL,
				m_08				TINYINT			NOT NULL,
				m_09				TINYINT			NOT NULL,
				m_10				TINYINT			NOT NULL,
				m_11				TINYINT			NOT NULL,
				m_12				TINYINT			NOT NULL,
				min_age				FLOAT			NULL, -- age in fractions of years in first month of financial year (April prev cal yr)
				sex				CHAR(1)			NULL,
				wkd_p_last_yr			TINYINT			NOT NULL -- dummy for worked for same pent last financal year
	PRIMARY KEY CLUSTERED (snz_uid,snz_ird_uid,financial_year,pent))

INSERT INTO #emp_emp_yr2(snz_uid, snz_ird_uid, financial_year, gross_earn_yr,
	pent, WP, fte_employee_av, fte_WP_av, mnths_wkd, mnths_wkd_ft, mnths_wkd_pt, m_01, m_02, m_03, m_04, m_05, m_06, m_07, m_08, m_09, m_10, m_11, m_12,
	min_age, sex, wkd_p_last_yr)

SELECT 
	EMP.snz_uid, EMP.snz_ird_uid, EMP.financial_year, EMP.gross_earn_yr,
	EMP.pent, EMP.WP, EMP.fte_employee_av, EMP.fte_WP_av, EMP.mnths_wkd, EMP.mnths_wkd_ft, EMP.mnths_wkd_pt, EMP.m_01, EMP.m_02, EMP.m_03, EMP.m_04, EMP.m_05, EMP.m_06, 
	EMP.m_07, EMP.m_08, EMP.m_09, EMP.m_10, EMP.m_11, EMP.m_12,
	min_age = CASE WHEN ((12.0*EMP.financial_year - 8) - (12*EMP.birth_yr + EMP.birth_mon))/12>=10 
		AND  ((12.0*EMP.financial_year - 8) - (12*EMP.birth_yr + EMP.birth_mon))/12<=85 THEN  ((12.0*EMP.financial_year - 8) - (12*EMP.birth_yr + EMP.birth_mon))/12
		ELSE NULL END,  
	EMP.sex, 
	wkd_p_last_yr = CASE WHEN LAST.snz_uid IS NOT NULL THEN 1 ELSE 0 END
FROM #emp_emp_yr EMP
LEFT JOIN #emp_emp_yr LAST
	ON EMP.snz_uid = LAST.snz_uid
	AND EMP.pent = LAST.pent
	AND EMP.prev_financial_yr = LAST.financial_year


-- 65,441,XXX rows






-8-------------------------------------------------------------------------------------------------------------
--- Part B: matching # emp_emp_yrb back on itself to create an indicator for worked for same pent in previous dim_year_key, pents with non-March financial yrs only
--- One observation per person-pent-dim_year_key year

CREATE TABLE #emp_emp_yr2b(	snz_uid				INT			NOT NULL,
				snz_ird_uid			INT			NOT NULL,
				dim_year_key			INT			NOT NULL,
				bal_mon				TINYINT			NOT NULL,
				gross_earn_yr			DECIMAL(13,2)		NOT NULL,
				pent				CHAR(10)		NOT NULL,
				WP				TINYINT			NOT NULL, -- indicator for ever a WP for the pent
				fte_employee_av			FLOAT			NULL,	-- employee ftes (average over 12 months)
				fte_WP_av			FLOAT			NULL,	-- working proprietor pseudo ftes
				mnths_wkd			TINYINT			NULL,
				mnths_wkd_ft			TINYINT			NULL,
				mnths_wkd_pt			TINYINT			NULL,
				m_01				TINYINT			NOT NULL, -- dummy for worked in Jan (as employee or WP)
				m_02				TINYINT			NOT NULL,
				m_03				TINYINT			NOT NULL,
				m_04				TINYINT			NOT NULL,
				m_05				TINYINT			NOT NULL,
				m_06				TINYINT			NOT NULL,
				m_07				TINYINT			NOT NULL,
				m_08				TINYINT			NOT NULL,
				m_09				TINYINT			NOT NULL,
				m_10				TINYINT			NOT NULL,
				m_11				TINYINT			NOT NULL,
				m_12				TINYINT			NOT NULL,
				min_age				FLOAT			NULL, -- age in fractions of years in first month of dim_year_key year
				sex				CHAR(1)			NULL,
				wkd_p_last_yr			TINYINT			NOT NULL -- dummy for worked for same pent last financal year
	PRIMARY KEY CLUSTERED (snz_uid,snz_ird_uid,dim_year_key,pent))

INSERT INTO #emp_emp_yr2b(snz_uid, snz_ird_uid, dim_year_key, bal_mon, gross_earn_yr,
	pent, WP, fte_employee_av, fte_WP_av, mnths_wkd, mnths_wkd_ft, mnths_wkd_pt, m_01, m_02, m_03, m_04, m_05, m_06, m_07, m_08, m_09, m_10, m_11, m_12,
	min_age, sex, wkd_p_last_yr)

SELECT 
	EMP.snz_uid, EMP.snz_ird_uid, EMP.dim_year_key, EMP.bal_mon, EMP.gross_earn_yr,
	EMP.pent, EMP.WP, EMP.fte_employee_av, EMP.fte_WP_av, EMP.mnths_wkd, EMP.mnths_wkd_ft, EMP.mnths_wkd_pt, EMP.m_01, EMP.m_02, EMP.m_03, EMP.m_04, EMP.m_05, EMP.m_06, 
	EMP.m_07, EMP.m_08, EMP.m_09, EMP.m_10, EMP.m_11, EMP.m_12,
	min_age = CASE WHEN EMP.bal_mon <=9 
		THEN ((12.0*FLOOR(EMP.dim_year_key/100) - 11 + EMP.bal_mon) - (12*EMP.birth_yr + EMP.birth_mon))/12
		WHEN EMP.bal_mon > 9 THEN ((12.0*FLOOR(EMP.dim_year_key/100) - 23 + EMP.bal_mon) - (12*EMP.birth_yr + EMP.birth_mon))/12
		ELSE NULL END,  
	EMP.sex, 
	wkd_p_last_yr = CASE WHEN LAST.snz_uid IS NOT NULL THEN 1 ELSE 0 END
FROM #emp_emp_yrb EMP
LEFT JOIN #emp_emp_yrb LAST
	ON EMP.snz_uid = LAST.snz_uid
	AND EMP.pent = LAST.pent
	AND EMP.prev_dim_year_key = LAST.dim_year_key

UPDATE #emp_emp_yr2b
SET min_age = NULL WHERE min_age<10 OR min_age>85


-- 29,947,XXX rows












-9-------------------------------------------------------------------------------------------------------------
-- collapsing Part A versions to one observation per person-financial year, combining obs from different employers, all firms
-- dropping employer data on highest paying job to be re-merged in


CREATE TABLE #emp_yr(	snz_uid					INT			NOT NULL,
			snz_ird_uid				INT			NOT NULL,
			financial_year				SMALLINT		NOT NULL,
			tot_gross_earn_yr			DECIMAL(13,2)		NOT NULL,
			max_gross_earn_yr			DECIMAL(13,2)		NOT NULL,
			WP					TINYINT			NOT NULL,	-- indicator for ever a WP for ANY pent
			tot_fte_employee_av			FLOAT			NULL,		-- average monthly employee ftes (over 12 months)
			tot_fte_WP_av				FLOAT			NULL,		-- average monthly working proprietor pseudo ftes
			tot_mon_wkd				TINYINT			NULL,		-- months in which worked at any pent
			num_pents				INT			NULL,		-- number of distinct pents worked at
			min_age					FLOAT			NULL,
			female					TINYINT			NULL
	PRIMARY KEY CLUSTERED (snz_uid,snz_ird_uid,financial_year))

INSERT INTO #emp_yr(snz_uid, snz_ird_uid, financial_year, tot_gross_earn_yr, max_gross_earn_yr,
	WP, tot_fte_employee_av, tot_fte_WP_av, tot_mon_wkd, num_pents,
	min_age,  female)

SELECT  
	snz_uid, 
	snz_ird_uid, 
	financial_year, 
	tot_gross_earn_yr = SUM(gross_earn_yr), -- gross employee earnings from all pents
	max_gross_earn_yr = MAX(gross_earn_yr), -- gross earning from highest-paying pent in year
	WP = MAX(WP), -- ever WP in any pent
	tot_fte_employee_av = SUM(fte_employee_av),
	tot_fte_WP_av = SUM(fte_WP_av), 
	tot_mon_wkd = MAX(m_01) + MAX(m_02) + MAX(m_03) + MAX(m_04) + MAX(m_05) + MAX(m_06) + MAX(m_07) + MAX(m_08) + MAX(m_09)
		+ MAX(m_10) + MAX(m_11) + MAX(m_12), -- number of calendar months worked in
	num_pents = COUNT(*), -- number of different employers in year
	min_age = MIN(min_age), -- age at start of financial year
	female = MAX(CASE WHEN sex = 'F' THEN 1 ELSE 0 END) -- ever marked as female
FROM #emp_emp_yr2 
GROUP BY snz_uid, snz_ird_uid, financial_year


-- 43,881,XXX rows 







-10-------------------------------------------------------------------------------------------------------------
-- merging highest paying pent ID back in to data with one observation per person-financial year

CREATE TABLE #emp_yr2(	snz_uid				INT			NOT NULL,
			snz_ird_uid			INT			NOT NULL,
			financial_year			SMALLINT		NOT NULL,
			tot_gross_earn_yr		DECIMAL(13,2)		NOT NULL,	-- total w/s earnings from all pents in yr
			max_gross_earn_yr		DECIMAL(13,2)		NOT NULL,	-- total w/s salary earnings from highest paying pent in yr
			WP				TINYINT			NOT NULL,	-- indicator for ever a WP for any pent
			tot_fte_employee_av		FLOAT			NULL,		-- average monthly employee ftes
			max_fte_employee_av		FLOAT			NULL,		-- average monthly employee ftes (over 12 mon) associated with highest paying pent
			tot_fte_WP_av			FLOAT			NULL,		-- average monthly working proprietor pseudo ftes
			tot_mon_wkd			TINYINT			NULL,		-- months in which worked at any pent
			max_mon_wkd			TINYINT			NULL,		-- months working at highest paying pent
			max_mon_wkd_ft			TINYINT			NULL,		-- months working full time (1 FTE) at highest paying pent
			max_mon_wkd_pt			TINYINT			NULL,		-- months working part time (<1 FTE) at highest paying pent
			num_pents			INT			NULL,		-- number of distinct pents worked at in year
			min_age				FLOAT			NULL,		-- age at start of financial year
			female				TINYINT			NULL,		-- indicator for ever female
			hp_pent				CHAR(10)		NOT NULL	-- id of pent that paid most w/s in year
	PRIMARY KEY CLUSTERED (snz_uid,snz_ird_uid,financial_year))

INSERT INTO #emp_yr2(snz_uid, snz_ird_uid, financial_year, tot_gross_earn_yr, max_gross_earn_yr,
	WP, tot_fte_employee_av, max_fte_employee_av, tot_fte_WP_av, tot_mon_wkd, max_mon_wkd, max_mon_wkd_ft, max_mon_wkd_pt, num_pents,
	min_age, female, hp_pent)

SELECT DISTINCT
	EMP.snz_uid, 
	EMP.snz_ird_uid, 
	EMP.financial_year, 
	tot_gross_earn_yr, 
	EMP.max_gross_earn_yr,
	EMP.WP, 
	tot_fte_employee_av, 
	max_fte_employee_av = MAX(PENT.fte_employee_av), -- maximum average ftes at a pent that paid the equal most
	tot_fte_WP_av, 
	tot_mon_wkd, 
	max_mon_wkd = MAX(PENT.mnths_wkd), -- max number of months worked at a pent that paid the most - not necessarily the hp_pent if max
						-- amount paid was exactly the same for two pents, but number of months was different (rare)
	max_mon_wkd_ft = MAX(PENT.mnths_wkd_ft), -- number of months wkd full time (1 FTE) at highest paying pent
	max_mon_wkd_pt = MAX(PENT.mnths_wkd_pt), -- numberof months wkd part time (<1 FTE) at highest paying pent
	num_pents,
	EMP.min_age, 
	female, 
	hp_pent = MAX(PENT.pent)
FROM #emp_yr EMP
LEFT JOIN #emp_emp_yr2 PENT
	ON EMP.snz_uid = PENT.snz_uid AND EMP.financial_year = PENT.financial_year AND EMP.max_gross_earn_yr = PENT.gross_earn_yr
GROUP BY EMP.snz_uid, EMP.snz_ird_uid, EMP.financial_year, tot_gross_earn_yr, EMP.max_gross_earn_yr, EMP.WP, 
	tot_fte_employee_av, tot_fte_WP_av, tot_mon_wkd, num_pents, EMP.min_age, female


-- 43,881,XXX rows
	






-11-------------------------------------------------------------------------------------------------------------
-- merging in whether worked for highest paying pent previous year, one observation per person-financial year



CREATE TABLE #emp_yr3(	snz_uid				INT			NOT NULL,
			snz_ird_uid			INT			NOT NULL,
			financial_year			SMALLINT		NOT NULL,
			tot_gross_earn_yr		DECIMAL(13,2)		NOT NULL,	-- total w/s earnings from all pents in yr
			max_gross_earn_yr		DECIMAL(13,2)		NOT NULL,	-- total w/s salary earnings from highest paying pent in yr
			WP				TINYINT			NOT NULL,	-- indicator for ever a WP for any pent
			tot_fte_employee_av		FLOAT			NULL,		-- average monthly employee ftes
			max_fte_employee_av		FLOAT			NULL,		-- average monthly employee ftes (over 12 mon) associated with highest paying pent
			tot_fte_WP_av			FLOAT			NULL,		-- average monthly working proprietor pseudo ftes
			tot_mon_wkd			TINYINT			NULL,		-- months in which worked at any pent
			max_mon_wkd			TINYINT			NULL,		-- monhts working at highest paying pent
			max_mon_wkd_ft			TINYINT			NULL,		-- monhts working full time (1 FTE) at highest paying pent
			max_mon_wkd_pt			TINYINT			NULL,		-- monhts working part time (<1 FTE) at highest paying pent
			num_pents			INT			NULL,		-- number of distinct pents worked at in year
			min_age				FLOAT			NULL,		-- age at start of financial year
			female				TINYINT			NULL,		-- indicator for ever female
			hp_pent				CHAR(10)		NOT NULL,	-- id of pent that paid most w/s in year
			wkd_hpp_1ya			TINYINT			NULL,		-- indicator for worked for highest paying pent prev yr
			last_yr				SMALLINT		NOT NULL
	PRIMARY KEY CLUSTERED (snz_uid,snz_ird_uid,financial_year))

INSERT INTO #emp_yr3(snz_uid, snz_ird_uid, financial_year, tot_gross_earn_yr, max_gross_earn_yr,
	WP, tot_fte_employee_av, max_fte_employee_av, tot_fte_WP_av, tot_mon_wkd, max_mon_wkd, max_mon_wkd_ft, max_mon_wkd_pt, num_pents,
	min_age, female, hp_pent, wkd_hpp_1ya, last_yr)

SELECT
	EMP.snz_uid, 
	EMP.snz_ird_uid, 
	EMP.financial_year, 
	tot_gross_earn_yr, 
	EMP.max_gross_earn_yr,
	EMP.WP, 
	tot_fte_employee_av,
	max_fte_employee_av, 
	tot_fte_WP_av, 
	tot_mon_wkd, 
	max_mon_wkd,						
	max_mon_wkd_ft,
	max_mon_wkd_pt,
	num_pents,
	EMP.min_age, 
	female, 
	hp_pent,
	wkd_hpp_1ya = wkd_p_last_yr,
	last_yr = EMP.financial_year - 1
FROM #emp_yr2 EMP
LEFT JOIN #emp_emp_yr2 LYR
	ON EMP.snz_uid = LYR.snz_uid AND EMP.financial_year = LYR.financial_year AND EMP.hp_pent = LYR.pent






-12-------------------------------------------------------------------------------------------------------------
-- merging in indicator for worked at highest paying pent two years ago into employee-year data
-- and also merging in worker FE. Note match is on snz_uid, so version of IDI must match

-- save as Wgap_v3_emp_yr4.csv 


SELECT 
	EMP.snz_uid, 
	EMP.snz_ird_uid, 
	EMP.financial_year, 
	tot_gross_earn_yr, 
	EMP.max_gross_earn_yr,
	EMP.WP, 
	tot_fte_employee_av, 
	max_fte_employee_av,
	tot_fte_WP_av, 
	tot_mon_wkd, 
	max_mon_wkd,						
	max_mon_wkd_ft,						
	max_mon_wkd_pt,						
	num_pents,
	EMP.min_age, 
	female, 
	hp_pent,
	wkd_hpp_1ya,
	wkd_hpp_2ya = CASE WHEN wkd_p_last_yr = 1 THEN 1 ELSE 0 END

FROM #emp_yr3 EMP
LEFT JOIN #emp_emp_yr2 LYR
	ON EMP.snz_uid = LYR.snz_uid AND EMP.last_yr = LYR.financial_year AND EMP.hp_pent = LYR.pent
LEFT JOIN [IDI_Sandpit].[clean_read_IR].[emp_WFE_IDI_20181020_RFabling] WFE
	ON EMP.snz_uid = WFE.snz_uid
WHERE EMP.financial_year<=2018


-- 41,621,XXX rows 















--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------
-- firm-year (to March inc) level data set for all employers
--------------------------------------------------------------------------------------------------------------



-13-------------------------------------------------------------------------------------------------------------
-- Part A: matching employer-employee-financial year level data set back on itself to get indicator for whether employed by the firm two years ago


CREATE TABLE #emp_emp_yr3(	snz_uid				INT		NOT NULL,
				snz_ird_uid			INT		NOT NULL,
				financial_year		 	SMALLINT	NOT NULL,
				gross_earn_yr			DECIMAL(13,2)	NOT NULL,
				pent				CHAR(10)	NOT NULL,
				WP				TINYINT		NOT NULL, -- indicator for ever a WP for the pent
				fte_employee_av			FLOAT		NULL,	-- employee ftes
				fte_employee_av_imp		FLOAT		NULL,	-- employee ftes, imputed to fraction of months worked if missing
				fte_WP_av			FLOAT		NULL,	-- working proprietor pseudo ftes
				mnths_wkd			TINYINT		NULL,
				mnths_wkd_ft			TINYINT		NULL,
				mnths_wkd_pt			TINYINT		NULL,
				m_01				TINYINT		NOT NULL, -- dummy for worked in Jan (as employee or WP)
				m_02				TINYINT		NOT NULL,
				m_03				TINYINT		NOT NULL,
				m_04				TINYINT		NOT NULL,
				m_05				TINYINT		NOT NULL,
				m_06				TINYINT		NOT NULL,
				m_07				TINYINT		NOT NULL,
				m_08				TINYINT		NOT NULL,
				m_09				TINYINT		NOT NULL,
				m_10				TINYINT		NOT NULL,
				m_11				TINYINT		NOT NULL,
				m_12				TINYINT		NOT NULL,
				min_age				FLOAT		NULL,
				sex				CHAR(1)		NULL,
				wkd_p_1ya			TINYINT		NOT NULL, -- dummy for worked for same pent last financal year
				wkd_p_2ya			TINYINT		NOT NULL -- dummy for worked for same pent two years ago
						
	PRIMARY KEY CLUSTERED (snz_uid,snz_ird_uid,financial_year,pent))

INSERT INTO #emp_emp_yr3(snz_uid, snz_ird_uid, financial_year, gross_earn_yr,
	pent, WP, fte_employee_av, fte_employee_av_imp, fte_WP_av, mnths_wkd, mnths_wkd_ft, mnths_wkd_pt, m_01, m_02, m_03, m_04, m_05, m_06, m_07, m_08, m_09, m_10, m_11, m_12,
	min_age, sex, wkd_p_1ya, wkd_p_2ya)

SELECT 
	EMP.snz_uid, EMP.snz_ird_uid, EMP.financial_year, EMP.gross_earn_yr,
	EMP.pent, EMP.WP, EMP.fte_employee_av, 
	fte_employee_av_imp = CASE WHEN EMP.fte_employee_av<=1 THEN EMP.fte_employee_av
		WHEN EMP.fte_employee_av>1 THEN (EMP.mnths_wkd/12.0) ELSE NULL END,
	EMP.fte_WP_av, EMP.mnths_wkd, EMP.mnths_wkd_ft, EMP.mnths_wkd_pt, EMP.m_01, EMP.m_02, EMP.m_03, EMP.m_04, EMP.m_05, EMP.m_06, 
	EMP.m_07, EMP.m_08, EMP.m_09, EMP.m_10, EMP.m_11, EMP.m_12,
	EMP.min_age, EMP.sex, 
	wkd_p_1ya = EMP.wkd_p_last_yr,
	wkd_p_2ya = ISNULL(LAST.wkd_p_last_yr,0) 
FROM #emp_emp_yr2 EMP
LEFT JOIN #emp_emp_yr2 LAST
	ON EMP.snz_uid = LAST.snz_uid
	AND EMP.pent = LAST.pent
	AND EMP.financial_year - 1 = LAST.financial_year


-- 65,441,XXX rows 









-14-------------------------------------------------------------------------------------------------------------

-- Part B: matching employer-employee-dim_year_key level data set back on itself to get indicator for whether employed by the firm two years ago
-- for pents with non-March financial years only


CREATE TABLE #emp_emp_yr3b(	snz_uid				INT			NOT NULL,
				snz_ird_uid			INT			NOT NULL,
				dim_year_key			INT			NOT NULL,
				gross_earn_yr			DECIMAL(13,2)		NOT NULL,
				pent				CHAR(10)		NOT NULL,
				WP				TINYINT			NOT NULL, -- indicator for ever a WP for the pent
				fte_employee_av			FLOAT			NULL,	-- employee ftes
				fte_employee_av_imp		FLOAT			NULL,	-- employee ftes, imputed to fraction of months worked if missing
				fte_WP_av			FLOAT			NULL,	-- working proprietor pseudo ftes
				mnths_wkd			TINYINT			NULL,
				mnths_wkd_ft			TINYINT			NULL,
				mnths_wkd_pt			TINYINT			NULL,
				m_01				TINYINT			NOT NULL, -- dummy for worked in Jan (as employee or WP)
				m_02				TINYINT			NOT NULL,
				m_03				TINYINT			NOT NULL,
				m_04				TINYINT			NOT NULL,
				m_05				TINYINT			NOT NULL,
				m_06				TINYINT			NOT NULL,
				m_07				TINYINT			NOT NULL,
				m_08				TINYINT			NOT NULL,
				m_09				TINYINT			NOT NULL,
				m_10				TINYINT			NOT NULL,
				m_11				TINYINT			NOT NULL,
				m_12				TINYINT			NOT NULL,
				min_age				FLOAT			NULL,
				sex				CHAR(1)			NULL,
				wkd_p_1ya			TINYINT			NOT NULL, -- dummy for worked for same pent last financal year
				wkd_p_2ya			TINYINT			NOT NULL -- dummy for worked for same pent two years ago
						
	PRIMARY KEY CLUSTERED (snz_uid,snz_ird_uid,dim_year_key,pent))

INSERT INTO #emp_emp_yr3b(snz_uid, snz_ird_uid, dim_year_key, gross_earn_yr,
	pent, WP, fte_employee_av, fte_employee_av_imp, fte_WP_av, mnths_wkd, mnths_wkd_ft, mnths_wkd_pt, m_01, m_02, m_03, m_04, m_05, m_06, m_07, m_08, m_09, m_10, m_11, m_12,
	min_age, sex, wkd_p_1ya, wkd_p_2ya)

SELECT 
	EMP.snz_uid, EMP.snz_ird_uid, EMP.dim_year_key, EMP.gross_earn_yr,
	EMP.pent, EMP.WP, EMP.fte_employee_av, 
	fte_employee_av_imp = CASE WHEN EMP.fte_employee_av<=1 THEN EMP.fte_employee_av
		WHEN EMP.fte_employee_av>1 THEN (EMP.mnths_wkd/12.0) ELSE NULL END,
	EMP.fte_WP_av, EMP.mnths_wkd, EMP.mnths_wkd_ft, EMP.mnths_wkd_pt, EMP.m_01, EMP.m_02, EMP.m_03, EMP.m_04, EMP.m_05, EMP.m_06, 
	EMP.m_07, EMP.m_08, EMP.m_09, EMP.m_10, EMP.m_11, EMP.m_12,
	EMP.min_age, EMP.sex, 
	wkd_p_1ya = EMP.wkd_p_last_yr,
	wkd_p_2ya = ISNULL(LAST.wkd_p_last_yr,0) 
FROM #emp_emp_yr2b EMP
LEFT JOIN #emp_emp_yr2b LAST
	ON EMP.snz_uid = LAST.snz_uid
	AND EMP.pent = LAST.pent
	AND EMP.dim_year_key - 1 = LAST.dim_year_key



drop table #emp_emp_yr2
drop table #emp_emp_yr2b
drop table #emp_emp_yr
drop table #emp_emp_yrb




-15-------------------------------------------------------------------------------------------------------------
--- Creating one table that appends #emp_emp_yr3b onto #emp_emp_yr3, treating financial_year (all firms) as the same variable as dim_year_key (non-March firms only) and calling it year


CREATE TABLE #emp_emp_yr3_merge(	snz_uid				INT			NOT NULL,
					snz_ird_uid			INT			NOT NULL,
					year				INT			NOT NULL,	-- March year where 4-digit, pent's financial year when 6-digit 
					gross_earn_yr			DECIMAL(13,2)	NOT NULL,
					pent				CHAR(10)	NOT NULL,
					WP				TINYINT		NOT NULL, -- indicator for ever a WP for the pent
					fte_employee_av			FLOAT		NULL,	-- employee ftes
					fte_employee_av_imp		FLOAT		NULL,	-- employee ftes, imputed to fraction of months worked if missing
					fte_WP_av			FLOAT		NULL,	-- working proprietor pseudo ftes
					mnths_wkd			TINYINT		NULL,
					mnths_wkd_ft			TINYINT		NULL,
					mnths_wkd_pt			TINYINT		NULL,
					m_01				TINYINT		NOT NULL, -- dummy for worked in Jan (as employee or WP)
					m_02				TINYINT		NOT NULL,
					m_03				TINYINT		NOT NULL,
					m_04				TINYINT		NOT NULL,
					m_05				TINYINT		NOT NULL,
					m_06				TINYINT		NOT NULL,
					m_07				TINYINT		NOT NULL,
					m_08				TINYINT		NOT NULL,
					m_09				TINYINT		NOT NULL,
					m_10				TINYINT		NOT NULL,
					m_11				TINYINT		NOT NULL,
					m_12				TINYINT		NOT NULL,
					min_age				FLOAT		NULL,
					sex				CHAR(1)		NULL,
					wkd_p_1ya			TINYINT		NOT NULL, -- dummy for worked for same pent last financal year
					wkd_p_2ya			TINYINT		NOT NULL -- dummy for worked for same pent two years ago
						
	PRIMARY KEY CLUSTERED (snz_uid,snz_ird_uid,year,pent))

INSERT INTO #emp_emp_yr3_merge(snz_uid, snz_ird_uid, year, gross_earn_yr,
	pent, WP, fte_employee_av, fte_employee_av_imp, fte_WP_av, mnths_wkd, mnths_wkd_ft, mnths_wkd_pt, m_01, m_02, m_03, m_04, m_05, m_06, m_07, m_08, m_09, m_10, m_11, m_12,
	min_age, sex, wkd_p_1ya, wkd_p_2ya)

SELECT 
	snz_uid, snz_ird_uid, 
	year = financial_year, 
	gross_earn_yr,
	pent, WP, fte_employee_av, 
	fte_employee_av_imp,
	fte_WP_av, mnths_wkd,  mnths_wkd_ft,  mnths_wkd_pt, m_01, m_02, m_03, m_04, m_05, m_06, 
	m_07, m_08, m_09, m_10, m_11, m_12,
	min_age, sex, 
	wkd_p_1ya,
	wkd_p_2ya
FROM #emp_emp_yr3 

INSERT INTO #emp_emp_yr3_merge(snz_uid, snz_ird_uid, year, gross_earn_yr,
	pent, WP, fte_employee_av, fte_employee_av_imp, fte_WP_av, mnths_wkd, mnths_wkd_ft, mnths_wkd_pt, m_01, m_02, m_03, m_04, m_05, m_06, m_07, m_08, m_09, m_10, m_11, m_12,
	min_age, sex, wkd_p_1ya, wkd_p_2ya)

SELECT 
	snz_uid, snz_ird_uid, 
	year = dim_year_key, 
	gross_earn_yr,
	pent, WP, fte_employee_av, 
	fte_employee_av_imp,
	fte_WP_av, mnths_wkd, mnths_wkd_ft, mnths_wkd_pt, m_01, m_02, m_03, m_04, m_05, m_06, 
	m_07, m_08, m_09, m_10, m_11, m_12,
	min_age, sex, 
	wkd_p_1ya,
	wkd_p_2ya 
FROM #emp_emp_yr3b 







-16-------------------------------------------------------------------------------------------------------------
-----------
-- table at the pent-dim_year_key level of number of WP who were not waged


CREATE TABLE #nonwgdWP(	pent					CHAR(10)	NOT NULL,
			dim_year_key				INT		NOT NULL,
			hcy_unwgdWP_t				TINYINT		NOT NULL, -- count of distinct WPs in the year who were not waged
			hcy_unwgdWP_m				TINYINT		NOT NULL, -- count of distinct male WPs in the year who were not waged
			hcy_unwgdWP_f				TINYINT		NOT NULL, -- count of distinct female WPs in the year who were not waged
	PRIMARY KEY CLUSTERED (pent, dim_year_key))

INSERT INTO #nonwgdWP(pent, dim_year_key, hcy_unwgdWP_t, hcy_unwgdWP_m, hcy_unwgdWP_f)

SELECT DISTINCT 
	WKP.pent,
	dim_year_key,
	hcy_unwgdWP_t = SUM(CASE WHEN WKP.nonwgdWP = 1 THEN 1 ELSE 0 END),
	hcy_unwgdWP_m = SUM(CASE WHEN WKP.nonwgdWP = 1 AND female = 0 THEN 1 ELSE 0 END),
	hcy_unwgdWP_f = SUM(CASE WHEN WKP.nonwgdWP = 1 AND female = 1 THEN 1 ELSE 0 END)
FROM (SELECT 
		pent,
		WP.snz_uid,
		dim_year_key,
		nonwgdWP = CASE WHEN has_nonwage_inc = 1 AND has_wage_inc = 0 THEN 1 ELSE 0 END,
		female = CASE WHEN snz_sex_code = 2 THEN 1 ELSE 0 END
	FROM IDI_Sandpit.[clean_read_IR].[pent_WP_yr_IDI_20181020_RFabling] WP
	LEFT JOIN [data].[personal_detail] SEX
	ON WP.snz_uid = SEX.snz_uid) WKP
GROUP BY pent, dim_year_key
HAVING SUM(CASE WHEN WKP.nonwgdWP = 1 THEN 1 ELSE 0 END)>0


INSERT INTO #nonwgdWP(pent, dim_year_key, hcy_unwgdWP_t, hcy_unwgdWP_m, hcy_unwgdWP_f)

SELECT DISTINCT 
	WKP.pent,
	dim_year_key = FLOOR(dim_year_key/100),
	hcy_unwgdWP_t = SUM(CASE WHEN WKP.nonwgdWP = 1 THEN 1 ELSE 0 END),
	hcy_unwgdWP_m = SUM(CASE WHEN WKP.nonwgdWP = 1 AND female = 0 THEN 1 ELSE 0 END),
	hcy_unwgdWP_f = SUM(CASE WHEN WKP.nonwgdWP = 1 AND female = 1 THEN 1 ELSE 0 END)
FROM (SELECT 
		pent,
		WP.snz_uid,
		dim_year_key,
		nonwgdWP = CASE WHEN has_nonwage_inc = 1 AND has_wage_inc = 0 THEN 1 ELSE 0 END,
		female = CASE WHEN snz_sex_code = 2 THEN 1 ELSE 0 END
	FROM IDI_Sandpit.[clean_read_IR].[pent_WP_yr_IDI_20181020_RFabling] WP
	LEFT JOIN [data].[personal_detail] SEX
	ON WP.snz_uid = SEX.snz_uid) WKP
GROUP BY pent, dim_year_key
HAVING SUM(CASE WHEN WKP.nonwgdWP = 1 THEN 1 ELSE 0 END)>0








-17-------------------------------------------------------------------------------------------------------------
-- Part I, a version (all firms)

-- save as csv file Wgap_v3_pent_yrA1.csv


SELECT DISTINCT  
	P.pent,
	year = financial_year, -- dim_year_key for b version
	
	months_empl = MAX(CASE WHEN WP = 0 THEN m_01 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_02 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_03 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_04 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_05 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_06 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_07 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_08 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_09 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_10 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_11 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_12 ELSE 0 END), -- number of months when pent paid a non-WP employee

	-- head count-months of waged WPs
	hc_wagedWP_t = SUM(CASE WHEN WP = 1 THEN mnths_wkd ELSE 0 END),
	hc_wagedWP_m = SUM(CASE WHEN WP = 1 AND sex = 'M' THEN mnths_wkd ELSE 0 END),
	hc_wagedWP_f = SUM(CASE WHEN WP = 1 AND sex = 'F' THEN mnths_wkd ELSE 0 END),

	-- head count-YEARS of waged WPs
	hcy_wagedWP_t = SUM(CASE WHEN WP = 1 AND mnths_wkd>0 THEN 1 ELSE 0 END),
	hcy_wagedWP_m = SUM(CASE WHEN WP = 1 AND mnths_wkd>0 AND sex = 'M' THEN 1 ELSE 0 END),
	hcy_wagedWP_f = SUM(CASE WHEN WP = 1 AND mnths_wkd>0 AND sex = 'F' THEN 1 ELSE 0 END),

	-- head count-YEARS of non-waged WPs
	hcy_unwgdWP_t = ISNULL(MAX(hcy_unwgdWP_t),0),
	hcy_unwgdWP_m = ISNULL(MAX(hcy_unwgdWP_m),0),
	hcy_unwgdWP_f = ISNULL(MAX(hcy_unwgdWP_f),0), 

	-- waged WP FTEs
	fte_wagedWP_t = SUM(CASE WHEN WP = 1 THEN fte_WP_av ELSE 0 END),
	fte_wagedWP_m = SUM(CASE WHEN WP = 1 AND sex = 'M' THEN fte_WP_av ELSE 0 END),
	fte_wagedWP_f = SUM(CASE WHEN WP = 1 AND sex = 'F' THEN fte_WP_av ELSE 0 END),

	-- wage/salary paid to WP FTEs
	ws_wagedWP_t = SUM(CASE WHEN WP = 1 THEN gross_earn_yr ELSE 0 END),
	ws_wagedWP_m = SUM(CASE WHEN WP = 1 AND sex = 'M' THEN gross_earn_yr ELSE 0 END),
	ws_wagedWP_f = SUM(CASE WHEN WP = 1 AND sex = 'F' THEN gross_earn_yr ELSE 0 END),


	-----------------------------------------
	-- head counts for non-WP employees

	-- total, male, and female head counts
	hc_t = SUM(CASE WHEN WP = 0 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders
	hc_m = SUM(CASE WHEN WP = 0 AND sex = 'M' THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders
	hc_f = SUM(CASE WHEN WP = 0 AND sex = 'F' THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders

	-- total, male, and female head counts of full-time (1 FTE) workers
	hc_t_ft = SUM(CASE WHEN WP = 0 THEN mnths_wkd_ft ELSE 0 END), -- headcount-fulltimemonths of non-WP employees, both genders
	hc_m_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' THEN mnths_wkd_ft ELSE 0 END), -- headcount-fulltimemonths of non-WP employees, male
	hc_f_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' THEN mnths_wkd_ft ELSE 0 END), -- headcount-fultimemonths of non-WP employees, female

	-- total, male, and female head counts of part-time (<1 FTE) workers
	hc_t_pt = SUM(CASE WHEN WP = 0 THEN mnths_wkd_pt ELSE 0 END), -- headcount-parttimemonths of non-WP employees, both genders
	hc_m_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' THEN mnths_wkd_pt ELSE 0 END), -- headcount-parttimemonths of non-WP employees, male
	hc_f_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' THEN mnths_wkd_pt ELSE 0 END), -- headcount-parttimemonths of non-WP employees, female


	-----------------------------------------
	-- wage/salary paid

	-- total, male, and female wage/salary paid
	ws_t = SUM(CASE WHEN WP = 0 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders
	ws_m = SUM(CASE WHEN WP = 0 AND sex = 'M' THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders
	ws_f = SUM(CASE WHEN WP = 0 AND sex = 'F' THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders


	-----------------------------------------
	-- FTEs worked

	-- total, male, and female FTEs worked
	fte_t = SUM(CASE WHEN WP = 0 THEN fte_employee_av_imp ELSE 0 END), -- annual FTEs of non-WP employees, both genders
	fte_m = SUM(CASE WHEN WP = 0 AND sex = 'M' THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders
	fte_f = SUM(CASE WHEN WP = 0 AND sex = 'F' THEN fte_employee_av_imp ELSE 0 END) -- headcount-months of non-WP employees, both genders


FROM #emp_emp_yr3 P
LEFT JOIN #nonwgdWP NWWP
	ON P.pent = NWWP.pent AND P.financial_year = NWWP.dim_year_key
GROUP BY P.pent, financial_year







-18-------------------------------------------------------------------------------------------------------------
-- Part I, b version
-- for firms with non-March financial years only

-- save as csv file Wgap_v3_pent_yrA2.csv


SELECT DISTINCT  
	P.pent,
	year = P.dim_year_key, 

	months_empl = MAX(CASE WHEN WP = 0 THEN m_01 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_02 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_03 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_04 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_05 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_06 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_07 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_08 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_09 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_10 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_11 ELSE 0 END)
		+ MAX(CASE WHEN WP = 0 THEN m_12 ELSE 0 END), -- number of months when pent paid a non-WP employee

	-- head count-months of waged WPs
	hc_wagedWP_t = SUM(CASE WHEN WP = 1 THEN mnths_wkd ELSE 0 END),
	hc_wagedWP_m = SUM(CASE WHEN WP = 1 AND sex = 'M' THEN mnths_wkd ELSE 0 END),
	hc_wagedWP_f = SUM(CASE WHEN WP = 1 AND sex = 'F' THEN mnths_wkd ELSE 0 END),

	-- head count-YEARS of waged WPs
	hcy_wagedWP_t = SUM(CASE WHEN WP = 1 AND mnths_wkd>0 THEN 1 ELSE 0 END),
	hcy_wagedWP_m = SUM(CASE WHEN WP = 1 AND mnths_wkd>0 AND sex = 'M' THEN 1 ELSE 0 END),
	hcy_wagedWP_f = SUM(CASE WHEN WP = 1 AND mnths_wkd>0 AND sex = 'F' THEN 1 ELSE 0 END),

	-- head count-YEARS of non-waged WPs
	hcy_unwgdWP_t = ISNULL(MAX(hcy_unwgdWP_t),0),
	hcy_unwgdWP_m = ISNULL(MAX(hcy_unwgdWP_m),0),
	hcy_unwgdWP_f = ISNULL(MAX(hcy_unwgdWP_f),0), 

	-- waged WP FTEs
	fte_wagedWP_t = SUM(CASE WHEN WP = 1 THEN fte_WP_av ELSE 0 END),
	fte_wagedWP_m = SUM(CASE WHEN WP = 1 AND sex = 'M' THEN fte_WP_av ELSE 0 END),
	fte_wagedWP_f = SUM(CASE WHEN WP = 1 AND sex = 'F' THEN fte_WP_av ELSE 0 END),

	-- wage/salary paid to WP FTEs
	ws_wagedWP_t = SUM(CASE WHEN WP = 1 THEN gross_earn_yr ELSE 0 END),
	ws_wagedWP_m = SUM(CASE WHEN WP = 1 AND sex = 'M' THEN gross_earn_yr ELSE 0 END),
	ws_wagedWP_f = SUM(CASE WHEN WP = 1 AND sex = 'F' THEN gross_earn_yr ELSE 0 END),


	-----------------------------------------
	-- head counts for non-WP employees

	-- total, male, and female head counts
	hc_t = SUM(CASE WHEN WP = 0 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders
	hc_m = SUM(CASE WHEN WP = 0 AND sex = 'M' THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders
	hc_f = SUM(CASE WHEN WP = 0 AND sex = 'F' THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders

	-- total, male, and female head counts of full-time (1 FTE) workers
	hc_t_ft = SUM(CASE WHEN WP = 0 THEN mnths_wkd_ft ELSE 0 END), -- headcount-fulltimemonths of non-WP employees, both genders
	hc_m_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' THEN mnths_wkd_ft ELSE 0 END), -- headcount-fulltimemonths of non-WP employees, male
	hc_f_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' THEN mnths_wkd_ft ELSE 0 END), -- headcount-fultimemonths of non-WP employees, female

	-- total, male, and female head counts of part-time (<1 FTE) workers
	hc_t_pt = SUM(CASE WHEN WP = 0 THEN mnths_wkd_pt ELSE 0 END), -- headcount-parttimemonths of non-WP employees, both genders
	hc_m_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' THEN mnths_wkd_pt ELSE 0 END), -- headcount-parttimemonths of non-WP employees, male
	hc_f_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' THEN mnths_wkd_pt ELSE 0 END), -- headcount-parttimemonths of non-WP employees, female


	-----------------------------------------
	-- wage/salary paid

	-- total, male, and female wage/salary paid
	ws_t = SUM(CASE WHEN WP = 0 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders
	ws_m = SUM(CASE WHEN WP = 0 AND sex = 'M' THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders
	ws_f = SUM(CASE WHEN WP = 0 AND sex = 'F' THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders


	-----------------------------------------
	-- FTEs worked

	-- total, male, and female FTEs worked
	fte_t = SUM(CASE WHEN WP = 0 THEN fte_employee_av_imp ELSE 0 END), -- annual FTEs of non-WP employees, both genders
	fte_m = SUM(CASE WHEN WP = 0 AND sex = 'M' THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders
	fte_f = SUM(CASE WHEN WP = 0 AND sex = 'F' THEN fte_employee_av_imp ELSE 0 END) -- headcount-months of non-WP employees, both genders


FROM #emp_emp_yr3b P
LEFT JOIN #nonwgdWP NWWP
	ON P.pent = NWWP.pent AND P.dim_year_key = NWWP.dim_year_key
GROUP BY P.pent, P.dim_year_key







-19-------------------------------------------------------------------------------------------------------------
-- Part II: age, a version (all firms)

-- save as csv file Wgap_v3_pent_yr_age1.csv

SELECT DISTINCT  
	P.pent,
	year = financial_year,

	-----------------------------------------
	-- head counts for non-WP employees

	-- total, male, and female head counts by age category
	hc_t_lt25 = SUM(CASE WHEN WP = 0 AND min_age<25 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders, aged <25
	hc_m_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age<25 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, males, aged <25
	hc_f_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age<25 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, females, aged <25
	hc_t_25to39 = SUM(CASE WHEN WP = 0 AND min_age>=25 AND min_age<40 THEN mnths_wkd ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [25,40)
	hc_m_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=25 AND min_age<40 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, males, aged [25,40)
	hc_f_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=25 AND min_age<40 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, females, aged [25,40)
	hc_t_40to54 = SUM(CASE WHEN WP = 0 AND min_age>=40 AND min_age<55 THEN mnths_wkd ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [40,55)
	hc_m_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=40 AND min_age<55 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, males, aged [40,55)
	hc_f_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=40 AND min_age<55 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, females, aged [40,55)
	hc_t_55to64 = SUM(CASE WHEN WP = 0 AND min_age>=55 AND min_age<65 THEN mnths_wkd ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [55,65)
	hc_m_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=55 AND min_age<65 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, males, aged [55,65)
	hc_f_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=55 AND min_age<65 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, females, aged [55,65)
	hc_t_65p = SUM(CASE WHEN WP = 0 AND min_age>=65 THEN mnths_wkd ELSE 0 END), -- hc-months of non-WP employees, both genders, aged 65+
	hc_m_65p = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=65 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, males, aged 65+
	hc_f_65p = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=65 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, females, aged 65+

	-- total, male, and female head counts by age category of full-time (1 FTE) workers
	hc_t_lt25_ft = SUM(CASE WHEN WP = 0 AND min_age<25 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, both genders, aged <25
	hc_m_lt25_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age<25 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, males, aged <25
	hc_f_lt25_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age<25 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, females, aged <25
	hc_t_25to39_ft = SUM(CASE WHEN WP = 0 AND min_age>=25 AND min_age<40 THEN mnths_wkd_ft ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [25,40)
	hc_m_25to39_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=25 AND min_age<40 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, males, aged [25,40)
	hc_f_25to39_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=25 AND min_age<40 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, females, aged [25,40)
	hc_t_40to54_ft = SUM(CASE WHEN WP = 0 AND min_age>=40 AND min_age<55 THEN mnths_wkd_ft ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [40,55)
	hc_m_40to54_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=40 AND min_age<55 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, males, aged [40,55)
	hc_f_40to54_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=40 AND min_age<55 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, females, aged [40,55)
	hc_t_55to64_ft = SUM(CASE WHEN WP = 0 AND min_age>=55 AND min_age<65 THEN mnths_wkd_ft ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [55,65)
	hc_m_55to64_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=55 AND min_age<65 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, males, aged [55,65)
	hc_f_55to64_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=55 AND min_age<65 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, females, aged [55,65)
	hc_t_65p_ft = SUM(CASE WHEN WP = 0 AND min_age>=65 THEN mnths_wkd_ft ELSE 0 END), -- hc-months of non-WP employees, both genders, aged 65+
	hc_m_65p_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=65 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, males, aged 65+
	hc_f_65p_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=65 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, females, aged 65+

	-- total, male, and female head counts by age category of part-time (<1 FTE) workers
	hc_t_lt25_pt = SUM(CASE WHEN WP = 0 AND min_age<25 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, both genders, aged <25
	hc_m_lt25_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age<25 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, males, aged <25
	hc_f_lt25_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age<25 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, females, aged <25
	hc_t_25to39_pt = SUM(CASE WHEN WP = 0 AND min_age>=25 AND min_age<40 THEN mnths_wkd_pt ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [25,40)
	hc_m_25to39_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=25 AND min_age<40 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, males, aged [25,40)
	hc_f_25to39_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=25 AND min_age<40 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, females, aged [25,40)
	hc_t_40to54_pt = SUM(CASE WHEN WP = 0 AND min_age>=40 AND min_age<55 THEN mnths_wkd_pt ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [40,55)
	hc_m_40to54_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=40 AND min_age<55 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, males, aged [40,55)
	hc_f_40to54_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=40 AND min_age<55 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, females, aged [40,55)
	hc_t_55to64_pt = SUM(CASE WHEN WP = 0 AND min_age>=55 AND min_age<65 THEN mnths_wkd_pt ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [55,65)
	hc_m_55to64_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=55 AND min_age<65 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, males, aged [55,65)
	hc_f_55to64_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=55 AND min_age<65 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, females, aged [55,65)
	hc_t_65p_pt = SUM(CASE WHEN WP = 0 AND min_age>=65 THEN mnths_wkd_pt ELSE 0 END), -- hc-months of non-WP employees, both genders, aged 65+
	hc_m_65p_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=65 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, males, aged 65+
	hc_f_65p_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=65 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, females, aged 65+

	-- total, male, and female wage/salary paid by age category
	ws_t_lt25 = SUM(CASE WHEN WP = 0 AND min_age<25 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders, aged <25
	ws_m_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age<25 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, males, aged <25
	ws_f_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age<25 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, females, aged <25
	ws_t_25to39 = SUM(CASE WHEN WP = 0 AND min_age>=25 AND min_age<40 THEN gross_earn_yr ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [25,40)
	ws_m_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=25 AND min_age<40 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, males, aged [25,40)
	ws_f_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=25 AND min_age<40 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, females, aged [25,40)
	ws_t_40to54 = SUM(CASE WHEN WP = 0 AND min_age>=40 AND min_age<55 THEN gross_earn_yr ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [40,55)
	ws_m_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=40 AND min_age<55 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, males, aged [40,55)
	ws_f_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=40 AND min_age<55 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, females, aged [40,55)
	ws_t_55to64 = SUM(CASE WHEN WP = 0 AND min_age>=55 AND min_age<65 THEN gross_earn_yr ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [55,65)
	ws_m_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=55 AND min_age<65 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, males, aged [55,65)
	ws_f_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=55 AND min_age<65 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, females, aged [55,65)
	ws_t_65p = SUM(CASE WHEN WP = 0 AND min_age>=65 THEN gross_earn_yr ELSE 0 END), -- hc-months of non-WP employees, both genders, aged 65+
	ws_m_65p = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=65 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, males, aged 65+
	ws_f_65p = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=65 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, females, aged 65+

	-- total, male, and female FTEs worked by age category
	fte_t_lt25 = SUM(CASE WHEN WP = 0 AND min_age<25 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders, aged <25
	fte_m_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age<25 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, males, aged <25
	fte_f_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age<25 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, females, aged <25
	fte_t_25to39 = SUM(CASE WHEN WP = 0 AND min_age>=25 AND min_age<40 THEN fte_employee_av_imp ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [25,40)
	fte_m_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=25 AND min_age<40 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, males, aged [25,40)
	fte_f_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=25 AND min_age<40 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, females, aged [25,40)
	fte_t_40to54 = SUM(CASE WHEN WP = 0 AND min_age>=40 AND min_age<55 THEN fte_employee_av_imp ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [40,55)
	fte_m_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=40 AND min_age<55 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, males, aged [40,55)
	fte_f_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=40 AND min_age<55 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, females, aged [40,55)
	fte_t_55to64 = SUM(CASE WHEN WP = 0 AND min_age>=55 AND min_age<65 THEN fte_employee_av_imp ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [55,65)
	fte_m_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=55 AND min_age<65 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, males, aged [55,65)
	fte_f_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=55 AND min_age<65 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, females, aged [55,65)
	fte_t_65p = SUM(CASE WHEN WP = 0 AND min_age>=65 THEN fte_employee_av_imp ELSE 0 END), -- hc-months of non-WP employees, both genders, aged 65+
	fte_m_65p = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=65 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, males, aged 65+
	fte_f_65p = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=65 THEN fte_employee_av_imp ELSE 0 END) -- hc-mon of non-WP employees, females, aged 65+

FROM #emp_emp_yr3 P

GROUP BY P.pent, financial_year






-20-------------------------------------------------------------------------------------------------------------
-- Part II: age, b version (non-March firms only)

-- save as csv file Wgap_v3_pent_yr_age2.csv

SELECT DISTINCT  
	P.pent,
	year = P.dim_year_key,

	-----------------------------------------
	-- head counts for non-WP employees

	-- total, male, and female head counts by age category
	hc_t_lt25 = SUM(CASE WHEN WP = 0 AND min_age<25 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders, aged <25
	hc_m_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age<25 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, males, aged <25
	hc_f_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age<25 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, females, aged <25
	hc_t_25to39 = SUM(CASE WHEN WP = 0 AND min_age>=25 AND min_age<40 THEN mnths_wkd ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [25,40)
	hc_m_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=25 AND min_age<40 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, males, aged [25,40)
	hc_f_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=25 AND min_age<40 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, females, aged [25,40)
	hc_t_40to54 = SUM(CASE WHEN WP = 0 AND min_age>=40 AND min_age<55 THEN mnths_wkd ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [40,55)
	hc_m_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=40 AND min_age<55 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, males, aged [40,55)
	hc_f_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=40 AND min_age<55 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, females, aged [40,55)
	hc_t_55to64 = SUM(CASE WHEN WP = 0 AND min_age>=55 AND min_age<65 THEN mnths_wkd ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [55,65)
	hc_m_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=55 AND min_age<65 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, males, aged [55,65)
	hc_f_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=55 AND min_age<65 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, females, aged [55,65)
	hc_t_65p = SUM(CASE WHEN WP = 0 AND min_age>=65 THEN mnths_wkd ELSE 0 END), -- hc-months of non-WP employees, both genders, aged 65+
	hc_m_65p = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=65 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, males, aged 65+
	hc_f_65p = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=65 THEN mnths_wkd ELSE 0 END), -- hc-mon of non-WP employees, females, aged 65+

	-- total, male, and female head counts by age category of full-time (1 FTE) workers
	hc_t_lt25_ft = SUM(CASE WHEN WP = 0 AND min_age<25 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, both genders, aged <25
	hc_m_lt25_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age<25 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, males, aged <25
	hc_f_lt25_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age<25 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, females, aged <25
	hc_t_25to39_ft = SUM(CASE WHEN WP = 0 AND min_age>=25 AND min_age<40 THEN mnths_wkd_ft ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [25,40)
	hc_m_25to39_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=25 AND min_age<40 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, males, aged [25,40)
	hc_f_25to39_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=25 AND min_age<40 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, females, aged [25,40)
	hc_t_40to54_ft = SUM(CASE WHEN WP = 0 AND min_age>=40 AND min_age<55 THEN mnths_wkd_ft ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [40,55)
	hc_m_40to54_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=40 AND min_age<55 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, males, aged [40,55)
	hc_f_40to54_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=40 AND min_age<55 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, females, aged [40,55)
	hc_t_55to64_ft = SUM(CASE WHEN WP = 0 AND min_age>=55 AND min_age<65 THEN mnths_wkd_ft ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [55,65)
	hc_m_55to64_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=55 AND min_age<65 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, males, aged [55,65)
	hc_f_55to64_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=55 AND min_age<65 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, females, aged [55,65)
	hc_t_65p_ft = SUM(CASE WHEN WP = 0 AND min_age>=65 THEN mnths_wkd_ft ELSE 0 END), -- hc-months of non-WP employees, both genders, aged 65+
	hc_m_65p_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=65 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, males, aged 65+
	hc_f_65p_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=65 THEN mnths_wkd_ft ELSE 0 END), -- hc-mon of non-WP employees, females, aged 65+

	-- total, male, and female head counts by age category of part-time (<1 FTE) workers
	hc_t_lt25_pt = SUM(CASE WHEN WP = 0 AND min_age<25 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, both genders, aged <25
	hc_m_lt25_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age<25 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, males, aged <25
	hc_f_lt25_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age<25 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, females, aged <25
	hc_t_25to39_pt = SUM(CASE WHEN WP = 0 AND min_age>=25 AND min_age<40 THEN mnths_wkd_pt ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [25,40)
	hc_m_25to39_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=25 AND min_age<40 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, males, aged [25,40)
	hc_f_25to39_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=25 AND min_age<40 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, females, aged [25,40)
	hc_t_40to54_pt = SUM(CASE WHEN WP = 0 AND min_age>=40 AND min_age<55 THEN mnths_wkd_pt ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [40,55)
	hc_m_40to54_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=40 AND min_age<55 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, males, aged [40,55)
	hc_f_40to54_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=40 AND min_age<55 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, females, aged [40,55)
	hc_t_55to64_pt = SUM(CASE WHEN WP = 0 AND min_age>=55 AND min_age<65 THEN mnths_wkd_pt ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [55,65)
	hc_m_55to64_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=55 AND min_age<65 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, males, aged [55,65)
	hc_f_55to64_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=55 AND min_age<65 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, females, aged [55,65)
	hc_t_65p_pt = SUM(CASE WHEN WP = 0 AND min_age>=65 THEN mnths_wkd_pt ELSE 0 END), -- hc-months of non-WP employees, both genders, aged 65+
	hc_m_65p_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=65 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, males, aged 65+
	hc_f_65p_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=65 THEN mnths_wkd_pt ELSE 0 END), -- hc-mon of non-WP employees, females, aged 65+

	-- total, male, and female wage/salary paid by age category
	ws_t_lt25 = SUM(CASE WHEN WP = 0 AND min_age<25 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders, aged <25
	ws_m_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age<25 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, males, aged <25
	ws_f_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age<25 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, females, aged <25
	ws_t_25to39 = SUM(CASE WHEN WP = 0 AND min_age>=25 AND min_age<40 THEN gross_earn_yr ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [25,40)
	ws_m_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=25 AND min_age<40 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, males, aged [25,40)
	ws_f_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=25 AND min_age<40 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, females, aged [25,40)
	ws_t_40to54 = SUM(CASE WHEN WP = 0 AND min_age>=40 AND min_age<55 THEN gross_earn_yr ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [40,55)
	ws_m_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=40 AND min_age<55 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, males, aged [40,55)
	ws_f_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=40 AND min_age<55 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, females, aged [40,55)
	ws_t_55to64 = SUM(CASE WHEN WP = 0 AND min_age>=55 AND min_age<65 THEN gross_earn_yr ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [55,65)
	ws_m_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=55 AND min_age<65 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, males, aged [55,65)
	ws_f_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=55 AND min_age<65 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, females, aged [55,65)
	ws_t_65p = SUM(CASE WHEN WP = 0 AND min_age>=65 THEN gross_earn_yr ELSE 0 END), -- hc-months of non-WP employees, both genders, aged 65+
	ws_m_65p = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=65 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, males, aged 65+
	ws_f_65p = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=65 THEN gross_earn_yr ELSE 0 END), -- hc-mon of non-WP employees, females, aged 65+

	-- total, male, and female FTEs worked by age category
	fte_t_lt25 = SUM(CASE WHEN WP = 0 AND min_age<25 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders, aged <25
	fte_m_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age<25 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, males, aged <25
	fte_f_lt25 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age<25 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, females, aged <25
	fte_t_25to39 = SUM(CASE WHEN WP = 0 AND min_age>=25 AND min_age<40 THEN fte_employee_av_imp ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [25,40)
	fte_m_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=25 AND min_age<40 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, males, aged [25,40)
	fte_f_25to39 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=25 AND min_age<40 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, females, aged [25,40)
	fte_t_40to54 = SUM(CASE WHEN WP = 0 AND min_age>=40 AND min_age<55 THEN fte_employee_av_imp ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [40,55)
	fte_m_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=40 AND min_age<55 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, males, aged [40,55)
	fte_f_40to54 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=40 AND min_age<55 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, females, aged [40,55)
	fte_t_55to64 = SUM(CASE WHEN WP = 0 AND min_age>=55 AND min_age<65 THEN fte_employee_av_imp ELSE 0 END), -- hc-months of non-WP employees, both genders, aged [55,65)
	fte_m_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=55 AND min_age<65 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, males, aged [55,65)
	fte_f_55to64 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=55 AND min_age<65 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, females, aged [55,65)
	fte_t_65p = SUM(CASE WHEN WP = 0 AND min_age>=65 THEN fte_employee_av_imp ELSE 0 END), -- hc-months of non-WP employees, both genders, aged 65+
	fte_m_65p = SUM(CASE WHEN WP = 0 AND sex = 'M' AND min_age>=65 THEN fte_employee_av_imp ELSE 0 END), -- hc-mon of non-WP employees, males, aged 65+
	fte_f_65p = SUM(CASE WHEN WP = 0 AND sex = 'F' AND min_age>=65 THEN fte_employee_av_imp ELSE 0 END) -- hc-mon of non-WP employees, females, aged 65+

FROM #emp_emp_yr3b P

GROUP BY P.pent, P.dim_year_key







-21-------------------------------------------------------------------------------------------------------------
-- part V: tenure

-- save as csv file Wgap_v3_pent_yr_ten.csv 


SELECT DISTINCT  
	P.pent,
	year,

	-- total, male and female headcounts by tenure
	hc_t_ten0 = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 0 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	hc_m_ten0 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 0 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	hc_f_ten0 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 0 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	hc_t_ten1 = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	hc_m_ten1 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	hc_f_ten1 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	hc_t_ten2 = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs
	hc_m_ten2 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs
	hc_f_ten2 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN mnths_wkd ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs

	-- total, male and female headcounts by tenure, full time
	hc_t_ten0_ft = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 0 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	hc_m_ten0_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 0 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	hc_f_ten0_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 0 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	hc_t_ten1_ft = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	hc_m_ten1_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	hc_f_ten1_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	hc_t_ten2_ft = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs
	hc_m_ten2_ft = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs
	hc_f_ten2_ft = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN mnths_wkd_ft ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs

	-- total, male and female headcounts by tenure, part time
	hc_t_ten0_pt = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 0 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	hc_m_ten0_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 0 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	hc_f_ten0_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 0 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	hc_t_ten1_pt = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	hc_m_ten1_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	hc_f_ten1_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	hc_t_ten2_pt = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs
	hc_m_ten2_pt = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs
	hc_f_ten2_pt = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN mnths_wkd_pt ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs

	-- total, male and female wage/salary paid by tenure
	ws_t_ten0 = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 0 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	ws_m_ten0 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 0 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	ws_f_ten0 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 0 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	ws_t_ten1 = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	ws_m_ten1 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	ws_f_ten1 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	ws_t_ten2 = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs
	ws_m_ten2 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs
	ws_f_ten2 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN gross_earn_yr ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs

	-- total, male and female FTEs worked by tenure
	fte_t_ten0 = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 0 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	fte_m_ten0 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 0 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	fte_f_ten0 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 0 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders, not at pent last yr
	fte_t_ten1 = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	fte_m_ten1 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	fte_f_ten1 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 1 AND wkd_p_2ya = 0 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last yr but not prev yr
	fte_t_ten2 = SUM(CASE WHEN WP = 0 AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs
	fte_m_ten2 = SUM(CASE WHEN WP = 0 AND sex = 'M' AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN fte_employee_av_imp ELSE 0 END), -- headcount-months of non-WP employees, both genders, at pent last 2 yrs
	fte_f_ten2 = SUM(CASE WHEN WP = 0 AND sex = 'F' AND wkd_p_1ya = 1 AND wkd_p_2ya = 1 THEN fte_employee_av_imp ELSE 0 END) -- headcount-months of non-WP employees, both genders, at pent last 2 yrs
		
FROM #emp_emp_yr3_merge P
GROUP BY P.pent, year












--------------------------------------------
-------------------------------------------------------------------------------------------------------------
-- Firm multiplant indicator at the pent-March year and pent-own financial year level
-------------------------------------------------------------------------------------------------------------



-22-------------------------------------------------------------------------------------------------------------
-- table at the pent-month level with range of pbns and balance month

CREATE TABLE #pent_mon_mult(	pent					CHAR(10)		NOT NULL,
				dim_month_key				INT			NOT NULL,
				dim_year_key				INT			NOT NULL,
				calendar_year				SMALLINT		NOT NULL,
				March_year				SMALLINT		NOT NULL,
				min_pbn					CHAR(10)		NOT NULL,
				max_pbn					CHAR(10)		NOT NULL,
				multipl_mon				TINYINT			NOT NULL,	-- dummy for multiple pbns that month
				bal_mon					TINYINT			NOT NULL
				
	PRIMARY KEY CLUSTERED (pent, dim_month_key))

INSERT INTO #pent_mon_mult(pent, dim_month_key, dim_year_key, calendar_year, March_year, min_pbn, max_pbn,
	multipl_mon, bal_mon)

SELECT 
	INS.pent, INS.dim_month_key, BAL2.dim_year_key, INS.calendar_year, INS.March_year, INS.min_pbn, INS.max_pbn, INS.multipl_mon, INS.bal_mon
FROM
	(SELECT DISTINCT
		MP.pent,
		MP.dim_month_key,
		calendar_year = FLOOR(MP.dim_month_key/100),
		March_year = CASE WHEN (MP.dim_month_key - 100*FLOOR(MP.dim_month_key/100))<=3 THEN FLOOR(MP.dim_month_key/100)
			ELSE FLOOR(MP.dim_month_key/100) + 1 END, -- labelled by calendar year at end
		min_pbn = MIN([min_pbn_nbr]), -- minimum pbn identifier to gen multiplant indicator
		max_pbn = MAX([max_pbn_nbr]), -- maximum pbn identifier to gen multiplant indicator
		multipl_mon = CASE WHEN MAX([max_pbn_nbr])!=MIN([min_pbn_nbr]) THEN 1 ELSE 0 END, -- dummy for multiple pbns in month
		bal_mon = MAX(balance_month_nbr) -- pent balance month
	FROM [IDI_Sandpit].[clean_read_IR].[pent_emp_mth_FTE_IDI_20181020_RFabling] MP
	LEFT JOIN [IDI_Sandpit].[clean_read_IR].[pent_bal_date_IDI_20181020_RFabling] BAL
		ON MP.pent = BAL.pent
	GROUP BY MP.pent, dim_month_key, [balance_month_nbr]) INS
LEFT JOIN IBULDD_PROD_CLEAN.[ibuldd_clean_archive_dec_2014].[dbo].[dim_bal_date_year] BAL2
	ON INS.bal_mon = BAL2.bal_month_nbr
WHERE INS.dim_month_key>=BAL2.[start_month_key] 
	AND INS.dim_month_key<=BAL2.[end_month_key]


-- 25,438,XXX rows






-23-------------------------------------------------------------------------------------------------------------
-- collapsing to an obs each pent-March_year (all pents) or each pent-firm_financial_year (pent where balance month is not March only)

-- save as Wgap_v3_loc.csv


CREATE TABLE #pent_yr_loc(	pent				CHAR(10)		NOT NULL,
				year				INT			NOT NULL,
				multiplant			TINYINT			NOT NULL
						
	PRIMARY KEY CLUSTERED (pent, year))

INSERT INTO #pent_yr_loc(pent, year, multiplant)
SELECT DISTINCT 
	pent,
	year = March_year,
	multiplant = CASE WHEN SUM(multipl_mon)>=2 THEN 1 ELSE 0 END -- dummy for multiple pbns in at least 2 months of the year
	
FROM #pent_mon_mult
GROUP BY pent, March_year


INSERT INTO #pent_yr_loc(pent, year, multiplant)
SELECT DISTINCT 
	pent,
	year = dim_year_key,
	multiplant = CASE WHEN SUM(multipl_mon)>=2 THEN 1 ELSE 0 END -- dummy for multiple pbns in at least 2 months of the year
	
FROM #pent_mon_mult
WHERE bal_mon != 3
GROUP BY pent, dim_year_key


-- 4,034,XXX rows











-24-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-- creating  Wgap_v3_fpers.csv at person-pent-March year level: how many months person worked for pent 


-- save as Wgap_v3_fpers.csv

SELECT DISTINCT 
	snz_uid,
	pent,
	March_yr,
	mon_wkd = count(month)
FROM
	(SELECT DISTINCT 
		EMS.snz_uid,
		pent,
		year = YEAR(EMS.[ir_ems_return_period_date]),
		month = MONTH(EMS.[ir_ems_return_period_date]),
		March_yr = CASE WHEN MONTH(EMS.[ir_ems_return_period_date])<=3 THEN YEAR(EMS.[ir_ems_return_period_date])
			WHEN MONTH(EMS.[ir_ems_return_period_date])>3 THEN YEAR(EMS.[ir_ems_return_period_date]) + 1 END
	FROM [ir_clean].[ird_ems] EMS
	LEFT JOIN IDI_Sandpit.clean_read_IR.pent_IDI_20181020_RFabling PENT
		ON EMS.ir_ems_enterprise_nbr = PENT.enterprise_nbr
	WHERE (100*YEAR(EMS.[ir_ems_return_period_date]) + MONTH(EMS.[ir_ems_return_period_date]))>=PENT.start_month
		AND (100*YEAR(EMS.[ir_ems_return_period_date]) + MONTH(EMS.[ir_ems_return_period_date]))<=PENT.end_month
	GROUP BY EMS.snz_uid, pent, YEAR(EMS.[ir_ems_return_period_date]), MONTH(EMS.[ir_ems_return_period_date])) MON
GROUP BY snz_uid, pent,	March_yr





